Load Packages

library(Seurat)
## Attaching SeuratObject
## Attaching sp
library(magrittr) 
library(dplyr) 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggplot2)
library(cowplot)

Load Data. This data contains only Endothelial cells. These have been selected from a larger object.

Samples include Lean, Obese and Unhealthy Obese this is “condition”

endo_cells <- readRDS(file ="/home/lucamannino/R_Projects/Intagration_endoCellAtlas/endo_cells.RDS")

We build a new UMAP plot to look at the data We use the integrated assay for this step

DefaultAssay(endo_cells) <- "integrated"

endo_cells <- FindNeighbors(endo_cells, dims = 1:25)
## Computing nearest neighbor graph
## Computing SNN
endo_cells <- RunUMAP(endo_cells, dims = 1:25)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 12:47:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:47:07 Read 27325 rows and found 25 numeric columns
## 12:47:07 Using Annoy for neighbor search, n_neighbors = 30
## 12:47:07 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:47:12 Writing NN index file to temp file /tmp/RtmpJ3HRca/file27e44f6445c235
## 12:47:12 Searching Annoy index using 1 thread, search_k = 3000
## 12:47:22 Annoy recall = 100%
## 12:47:23 Commencing smooth kNN distance calibration using 1 thread
## 12:47:24 Initializing from normalized Laplacian + noise
## 12:47:25 Commencing optimization for 200 epochs, with 1313600 positive edges
## 12:47:37 Optimization finished
endo_cells <- FindClusters(endo_cells, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 27325
## Number of edges: 1089437
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8631
## Number of communities: 15
## Elapsed time: 5 seconds

There are several patients included in this study, the patients labels are recorded under [[“orig.ident”]].

Patients have been categorized by their condition: Obese, Unhelthy Obese and Lean ([[“condition”]]).

From each patient subcutaneous and visceral cells were analysed, which cells belong to which tissue is recorded under [[“type”]]

DimPlot(endo_cells, label=TRUE, group.by = "type")

DimPlot(endo_cells, label=TRUE, group.by = "condition")

DimPlot(endo_cells, label=TRUE, group.by = "orig.ident")

In the following plot we look at specific markers to further clean the object from non endothelial cells For this we use the SCT assay

DefaultAssay(endo_cells) <- "SCT"
FeaturePlot(endo_cells, c("PECAM1","CDH5","CLU","VWF"))

DotPlot(endo_cells, features = c("PECAM1","CDH5","CLU","VWF"), assay="SCT")

DotPlot(endo_cells, features = c("FLRT2","ACSL4","COL8A1","CACNB2"), assay="SCT")

There are several patients included in this study, the patients labels are recorded under [[“orig.ident”]].

Patients have been categorized by their condition: Obese, Unhelthy Obese and Lean ([[“condition”]]).

From each patient subcutaneous and visceral cells were analysed, which cells belong to which tissue is recorded under [[“type”]]

DimPlot(endo_cells, label=TRUE, group.by = "type")

DimPlot(endo_cells, label=TRUE, group.by = "condition")

DimPlot(endo_cells, label=TRUE, group.by = "orig.ident")

we remove clusters 0,1,2,13 as they are not endothelial

EndoCells1 <- subset(endo_cells, idents = c(3,4,5,6,7,8,9,10,11,12,14))

New Umap plot is produced

DefaultAssay(EndoCells1) <- "integrated"

EndoCells1 <- FindNeighbors(EndoCells1, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
EndoCells1 <- RunUMAP(EndoCells1, dims = 1:20)
## 12:48:06 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:48:06 Read 13643 rows and found 20 numeric columns
## 12:48:06 Using Annoy for neighbor search, n_neighbors = 30
## 12:48:06 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:48:09 Writing NN index file to temp file /tmp/RtmpJ3HRca/file27e44f1ea932e1
## 12:48:09 Searching Annoy index using 1 thread, search_k = 3000
## 12:48:14 Annoy recall = 100%
## 12:48:14 Commencing smooth kNN distance calibration using 1 thread
## 12:48:15 Initializing from normalized Laplacian + noise
## 12:48:16 Commencing optimization for 200 epochs, with 643354 positive edges
## 12:48:22 Optimization finished
EndoCells1 <- FindClusters(EndoCells1, resolution = 3.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 13643
## Number of edges: 558064
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7377
## Number of communities: 43
## Elapsed time: 1 seconds

Check for immune cells

DefaultAssay(EndoCells1) <- "integrated"
EndoCells1 <- FindClusters(EndoCells1, resolution = 1.0)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 13643
## Number of edges: 558064
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8496
## Number of communities: 15
## Elapsed time: 1 seconds
DefaultAssay(EndoCells1) <- "SCT"

FeaturePlot(EndoCells1,  c("CD163","MRC1","PTPRC"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

FeaturePlot(EndoCells1,  c("CDH5","PECAM1","LYVE1","PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

DimPlot(EndoCells1, label=TRUE)

DotPlot(EndoCells1,features = c("CD163","MRC1","PTPRC","CDH5","PECAM1","LYVE1","PROX1"), assay = "SCT",cols = c("blue", "red"))

DimPlot(EndoCells1, label=TRUE, group.by = "type")

DimPlot(EndoCells1, label=TRUE, group.by = "condition")

DimPlot(EndoCells1, label=TRUE, group.by = "orig.ident")

removing clusters 7 and 8 and 14

EndoCells2 <- subset(EndoCells1, idents = c(0,1,2,3,4,5,6,9,10,11,12,13))

In this step we decided to carry out a new SCT transform and a new integration in order to obtain better results. As we have removed a lot of cells from the original SCT and integration assay it is usefull to run the integration and the normalisation again.

To acheve this we split the object in its original samples, run SCT transform on the “SOUP” assay. We will then integrate the object by sample.

EndoCells2.list <- SplitObject(EndoCells2, split.by = "orig.ident")

for (i in 1:length(EndoCells2.list)) {
    EndoCells2.list[[i]] <- SCTransform(EndoCells2.list[[i]], assay = "soup", variable.features.n = 3000, verbose = FALSE)
}

need to use: options(future.globals.maxSize = 2500 * 1024^2) in order to increase the maximum allowed size for the integration algorithm

options(future.globals.maxSize = 2500 * 1024^2)
EndoCells2.list.features <- SelectIntegrationFeatures(object.list = EndoCells2.list, nfeatures = 3000)
EndoCells2.list <- PrepSCTIntegration(object.list = EndoCells2.list, anchor.features = EndoCells2.list.features, 
    verbose = FALSE)
options(future.globals.maxSize = 2500 * 1024^2)
EndoCells2.list.anchors <- FindIntegrationAnchors(object.list = EndoCells2.list, normalization.method = "SCT", 
    anchor.features = EndoCells2.list.features, verbose = FALSE)
EndoCells2.list.integrated <- IntegrateData(anchorset = EndoCells2.list.anchors, normalization.method = "SCT", 
    verbose = FALSE)

We produce a new UMAP plot from the integrated data

DefaultAssay(EndoCells2.list.integrated) <- "integrated"
EndoCells2.list.integrated <- RunPCA(EndoCells2.list.integrated, npcs = 40, verbose = FALSE)
EndoCells2.list.integrated <- FindNeighbors(EndoCells2.list.integrated, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
EndoCells2.list.integrated <- RunUMAP(EndoCells2.list.integrated, dims = 1:20)
## 13:02:52 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:02:52 Read 11622 rows and found 20 numeric columns
## 13:02:52 Using Annoy for neighbor search, n_neighbors = 30
## 13:02:52 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:02:54 Writing NN index file to temp file /tmp/RtmpJ3HRca/file27e44f4814034c
## 13:02:54 Searching Annoy index using 1 thread, search_k = 3000
## 13:02:57 Annoy recall = 100%
## 13:02:57 Commencing smooth kNN distance calibration using 1 thread
## 13:02:58 Initializing from normalized Laplacian + noise
## 13:02:59 Commencing optimization for 200 epochs, with 514826 positive edges
## 13:03:04 Optimization finished
EndoCells2.list.integrated <- FindClusters(EndoCells2.list.integrated, resolution = 0.30)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11622
## Number of edges: 473471
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9103
## Number of communities: 8
## Elapsed time: 1 seconds

QC the new object

DefaultAssay(EndoCells2.list.integrated) <- "SCT"

DimPlot(EndoCells2.list.integrated, label=TRUE, group.by = "type")

DimPlot(EndoCells2.list.integrated, label=TRUE, group.by = "condition")

DimPlot(EndoCells2.list.integrated, label=TRUE, group.by = "orig.ident")

QC checks to see if we have any non endothelial cells left

DefaultAssay(EndoCells2.list.integrated) <- "SCT"
FeaturePlot(EndoCells2.list.integrated,  c("CD163","MRC1","PTPRC"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

FeaturePlot(EndoCells2.list.integrated,  c("CDH5","PECAM1","LYVE1","PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

FeaturePlot(EndoCells2.list.integrated,  c("CDH5"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

DimPlot(EndoCells2.list.integrated, label=TRUE)

DotPlot(EndoCells2.list.integrated,features = c("CD163","MRC1","PTPRC","CDH5","PECAM1", "VCAM1", "LYVE1","PROX1"), assay = "SCT",cols = c("blue", "red"))

We remove cluster 3 from the object

Strict2 <- subset(EndoCells2.list.integrated, idents = c(0,1,2,4,5,6,7))
DefaultAssay(Strict2) <- "integrated"
Strict2 <- FindNeighbors(Strict2, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
Strict2 <- RunUMAP(Strict2, dims = 1:20)
## 13:03:20 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:03:20 Read 10236 rows and found 20 numeric columns
## 13:03:20 Using Annoy for neighbor search, n_neighbors = 30
## 13:03:20 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:03:21 Writing NN index file to temp file /tmp/RtmpJ3HRca/file27e44f5d3a1ebf
## 13:03:21 Searching Annoy index using 1 thread, search_k = 3000
## 13:03:24 Annoy recall = 100%
## 13:03:24 Commencing smooth kNN distance calibration using 1 thread
## 13:03:25 Initializing from normalized Laplacian + noise
## 13:03:25 Commencing optimization for 200 epochs, with 452802 positive edges
## 13:03:30 Optimization finished
Strict2 <- FindClusters(Strict2, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10236
## Number of edges: 415950
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8695
## Number of communities: 12
## Elapsed time: 1 seconds
DefaultAssay(Strict2) <- "integrated"
Strict2 <- FindClusters(Strict2, resolution = 0.35)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10236
## Number of edges: 415950
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8929
## Number of communities: 8
## Elapsed time: 1 seconds
DefaultAssay(Strict2) <- "SCT"

DimPlot(Strict2, label=TRUE, group.by = "type")

DimPlot(Strict2, label=TRUE, group.by = "condition")

DimPlot(Strict2, label=TRUE, group.by = "orig.ident")

DimPlot(Strict2, label=TRUE)

DimPlot(Strict2, label=TRUE, split.by = "type")

FeaturePlot(Strict2, c("PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0, split.by ="type" )

FeaturePlot(Strict2,  c("CD163","MRC1","PTPRC"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

FeaturePlot(Strict2,  c("CDH5","PECAM1","LYVE1","PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

FeaturePlot(Strict2,  c("ARL15"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

DimPlot(Strict2, label=TRUE)

DotPlot(Strict2,features = c("ARL15","MRC1","PTPRC","CDH5","PECAM1","LYVE1","PROX1"), assay = "SCT",cols = c("blue", "red"))

To label the endothelial subclusters we use Find all marker function, and then use this marker genes to appropiately label subclusters

DefaultAssay(Strict2) <- "SCT"
combined.sct <- PrepSCTFindMarkers(Strict2)
## Found 8 SCT models. Recorrecting SCT counts using minimum median counts: 1216.34073055455
combined.markers <- FindAllMarkers(combined.sct, assay = "SCT",    verbose = FALSE, only.pos = TRUE,  logfc.threshold = 0.05,)
## For a more efficient implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the limma package
## --------------------------------------------
## install.packages('BiocManager')
## BiocManager::install('limma')
## --------------------------------------------
## After installation of limma, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
head(combined.markers, n = 15)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj cluster    gene
## PKHD1L1  0.000000e+00  1.3393360 0.427 0.084  0.000000e+00       0 PKHD1L1
## RELN     0.000000e+00  1.0745094 0.360 0.071  0.000000e+00       0    RELN
## MMRN1    0.000000e+00  0.9988334 0.384 0.071  0.000000e+00       0   MMRN1
## PIEZO2  4.648381e-308  0.7442944 0.329 0.055 1.057600e-303       0  PIEZO2
## DOCK5   7.499172e-308  0.8076344 0.378 0.084 1.706212e-303       0   DOCK5
## PGM5    1.557859e-266  0.6269384 0.453 0.146 3.544440e-262       0    PGM5
## NRG3    3.081320e-260  0.8306883 0.319 0.064 7.010620e-256       0    NRG3
## VAV3    2.137763e-257  0.6256245 0.358 0.089 4.863838e-253       0    VAV3
## MPP7    8.685045e-255  0.6305471 0.276 0.045 1.976021e-250       0    MPP7
## PDE1A   1.074184e-241  0.9144833 0.343 0.088 2.443983e-237       0   PDE1A
## TFPI    1.054771e-240  0.7126664 0.625 0.305 2.399816e-236       0    TFPI
## TLL1    1.804908e-224  0.6305531 0.698 0.350 4.106526e-220       0    TLL1
## LYVE1   1.930487e-218  0.4650901 0.242 0.039 4.392244e-214       0   LYVE1
## PROX1   1.677030e-217  0.5227144 0.242 0.040 3.815578e-213       0   PROX1
## STAB2   7.975090e-213  0.3713843 0.206 0.025 1.814492e-208       0   STAB2
#We produce a table with top 50 marker genes per cluster to facilitate use
combined.markers %>%
    group_by(cluster) %>%
    slice_max(n = 50, order_by = avg_log2FC)
## # A tibble: 400 × 7
## # Groups:   cluster [8]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene     
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>    
##  1 0              1.34  0.427 0.084 0         0       PKHD1L1  
##  2 0              1.07  0.36  0.071 0         0       RELN     
##  3 1.50e-172      1.07  0.414 0.188 3.40e-168 0       EFNA5    
##  4 0              0.999 0.384 0.071 0         0       MMRN1    
##  5 1.07e-241      0.914 0.343 0.088 2.44e-237 0       PDE1A    
##  6 1.24e-122      0.887 0.578 0.418 2.82e-118 0       PPFIBP1  
##  7 3.08e-260      0.831 0.319 0.064 7.01e-256 0       NRG3     
##  8 1.69e-203      0.817 0.417 0.15  3.84e-199 0       LINC02147
##  9 7.50e-308      0.808 0.378 0.084 1.71e-303 0       DOCK5    
## 10 4.65e-308      0.744 0.329 0.055 1.06e-303 0       PIEZO2   
## # … with 390 more rows
#Following code is to produce a heatmap of the 20 top marker genes per cluster
combined.markers %>%
    group_by(cluster) %>%
    top_n(n = 20, wt = avg_log2FC) -> top20
DoHeatmap(combined.sct, features = top20$gene) + NoLegend()
## Warning in DoHeatmap(combined.sct, features = top20$gene): The following
## features were omitted as they were not found in the scale.data slot for the SCT
## assay: LPP, PTEN, PKHD1L1

genes <- c(“EFNB2”,“ARL15”,“NKAIN2”,‘GJA5’,‘IGFBP3’,‘FBLN5’,‘CD36’,‘CADM2’,‘FABP4’,‘NRP1’,‘BTNL9’,‘PPARG’,‘COL4A1’,‘PDE3B’,‘AQP7’,‘PLIN1’,‘IGF1’,‘LPL’,‘ANGPT1’,‘ACSL1’,‘ZNF385D’,‘ADAMTS9’,‘HDAC9’,‘COL15A1’,‘VCAN’,‘LRRC1’,‘PLCXD3’,‘POSTN’,‘RYR3’,‘SELE’,‘RELN’,‘MMRN1’,‘STOX2’,‘NRG1’,‘CD163’,‘MRC1’,‘CDH1’,‘FLRT2’,‘ACSL4’,‘COL8A1’,‘CACNB2’)

genes <- c("EFNB2","ARL15","NKAIN2",'GJA5','IGFBP3','FBLN5','CD36','CADM2','FABP4','NRP1','BTNL9','PPARG','COL4A1','PDE3B','AQP7','PLIN1','IGF1','LPL','ANGPT1','ACSL1','ZNF385D','ADAMTS9','HDAC9','COL15A1','VCAN','LRRC1','PLCXD3','POSTN','RYR3','SELE','RELN','MMRN1','STOX2','NRG1','CD163','MRC1','CDH1','FLRT2','ACSL4','COL8A1','CACNB2')
DefaultAssay(Strict2) <- "SCT"
FeaturePlot(Strict2, genes)

DotPlot(Strict2,features = genes, assay = "SCT",cols = c("blue", "red"))+RotatedAxis()

FindSubCluster( object, cluster, graph.name, subcluster.name = “sub.cluster”, resolution = 0.5, algorithm = 1 )

DefaultAssay(Strict2) <- "integrated"
Strict3 <- FindSubCluster(Strict2, 0, subcluster.name = "sub.cluster", graph.name = "integrated_nn",
  resolution = 0.2,
  algorithm = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3295
## Number of edges: 31455
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8250
## Number of communities: 13
## Elapsed time: 0 seconds
## 11 singletons identified. 2 final clusters.
FeaturePlot(Strict3, genes)
## Warning: Found the following features in more than one assay, excluding the
## default. We will not include these in the final data frame: PLIN1, SELE, CDH1
## Warning in FetchData.Seurat(object = object, vars = c(dims, "ident",
## features), : The following requested variables were not found: PLIN1, SELE, CDH1

DotPlot(Strict3,features = genes, assay = "SCT",cols = c("blue", "red"))+RotatedAxis()

DefaultAssay(Strict3) <- "SCT"
FeaturePlot(Strict3,  c("CD163","MRC1","PTPRC"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

FeaturePlot(Strict3,  c("CDH5","PECAM1","LYVE1","PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

FeaturePlot(Strict3,  c("CDH5"), pt.size = 0.1,reduction ="umap",min.cutoff=0)

DimPlot(Strict3, label=TRUE, group.by="sub.cluster")

DotPlot(Strict3,features = c("CD163","MRC1","PTPRC","CDH5","PECAM1","LYVE1","PROX1"), assay = "SCT",cols = c("blue", "red"))

stem.combined <- RenameIdents(object = stem.combined, 0 = “your cell type”, 1 = “your other cell type”, 2 = “your last cell type”)

Idents(Strict3) <- "sub.cluster"
Strict3 <- RenameIdents(object = Strict3, "0_0" = "Lymphatic", "0_1" = "Vein_1", `1` = "Artery", "2"="Capillary", "3"="Vein_2","4"="Unknown","5"="Vein_3", "6"="Capillary/Arteriole","7"="Endo_MT")
DimPlot(Strict3, label=TRUE)

DimPlot(Strict3, label=FALSE, group.by = "type")

DimPlot(Strict3, label=FALSE, group.by = "condition")

DimPlot(Strict3, label=FALSE, group.by = "orig.ident")

DimPlot(Strict3, label=FALSE, split.by = "type")

DefaultAssay(Strict3) <- "SCT"
combined.Strict3 <- PrepSCTFindMarkers(Strict3)
## Found 8 SCT models. Recorrecting SCT counts using minimum median counts: 1216.34073055455
combined.markers.Strict3 <- FindAllMarkers(combined.Strict3, assay = "SCT",    verbose = FALSE, only.pos = TRUE,  logfc.threshold = 0.05,)
head(combined.markers, n = 15)
##                 p_val avg_log2FC pct.1 pct.2     p_val_adj cluster    gene
## PKHD1L1  0.000000e+00  1.3393360 0.427 0.084  0.000000e+00       0 PKHD1L1
## RELN     0.000000e+00  1.0745094 0.360 0.071  0.000000e+00       0    RELN
## MMRN1    0.000000e+00  0.9988334 0.384 0.071  0.000000e+00       0   MMRN1
## PIEZO2  4.648381e-308  0.7442944 0.329 0.055 1.057600e-303       0  PIEZO2
## DOCK5   7.499172e-308  0.8076344 0.378 0.084 1.706212e-303       0   DOCK5
## PGM5    1.557859e-266  0.6269384 0.453 0.146 3.544440e-262       0    PGM5
## NRG3    3.081320e-260  0.8306883 0.319 0.064 7.010620e-256       0    NRG3
## VAV3    2.137763e-257  0.6256245 0.358 0.089 4.863838e-253       0    VAV3
## MPP7    8.685045e-255  0.6305471 0.276 0.045 1.976021e-250       0    MPP7
## PDE1A   1.074184e-241  0.9144833 0.343 0.088 2.443983e-237       0   PDE1A
## TFPI    1.054771e-240  0.7126664 0.625 0.305 2.399816e-236       0    TFPI
## TLL1    1.804908e-224  0.6305531 0.698 0.350 4.106526e-220       0    TLL1
## LYVE1   1.930487e-218  0.4650901 0.242 0.039 4.392244e-214       0   LYVE1
## PROX1   1.677030e-217  0.5227144 0.242 0.040 3.815578e-213       0   PROX1
## STAB2   7.975090e-213  0.3713843 0.206 0.025 1.814492e-208       0   STAB2
#We produce a table with top 50 marker genes per cluster to facilitate use
combined.markers.Strict3 %>%
    group_by(cluster) %>%
    slice_max(n = 50, order_by = avg_log2FC)
## # A tibble: 450 × 7
## # Groups:   cluster [9]
##    p_val avg_log2FC pct.1 pct.2 p_val_adj cluster   gene     
##    <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>     <chr>    
##  1     0       1.80 0.725 0.088         0 Lymphatic PKHD1L1  
##  2     0       1.61 0.686 0.175         0 Lymphatic EFNA5    
##  3     0       1.53 0.613 0.074         0 Lymphatic RELN     
##  4     0       1.44 0.812 0.4           0 Lymphatic PPFIBP1  
##  5     0       1.39 0.618 0.082         0 Lymphatic MMRN1    
##  6     0       1.39 0.576 0.089         0 Lymphatic PDE1A    
##  7     0       1.35 0.676 0.148         0 Lymphatic LINC02147
##  8     0       1.22 0.528 0.069         0 Lymphatic NRG3     
##  9     0       1.19 0.569 0.111         0 Lymphatic LINC02208
## 10     0       1.19 0.628 0.089         0 Lymphatic DOCK5    
## # … with 440 more rows
#Following code is to produce a heatmap of the 20 top marker genes per cluster

combined.markers.Strict3 %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(Strict3, features = top10$gene)  + theme(text = element_text(size = 7.5))
## Warning in DoHeatmap(Strict3, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## PKHD1L1

DefaultAssay(Strict3) <- "SCT"
VlnPlot(object = Strict3, features = c('MMRN1',"TLL1","ARL15","CADM2","NCKAP5","RGS6"))

library(Seurat) library(ggplot2) library(cowplot)

Load Seurat obj

pbmc <- readRDS(“data/pbmc_2k_v3_Seurat.rds”)

features <- c(“CD79A”, “MS4A1”, “CD8A”, “CD8B”, “LYZ”, “LGALS3”, “S100A8”, “GNLY”, “NKG7”, “KLRB1”, “FCGR3A”, “FCER1A”, “CST3”)

a <- VlnPlot(pbmc, features, stack = TRUE, sort = TRUE) + theme(legend.position = “none”) + ggtitle(“Identity on y-axis”)

b <- VlnPlot(pbmc, features, stack = TRUE, sort = TRUE, flip = TRUE) + theme(legend.position = “none”) + ggtitle(“Identity on x-axis”)

DefaultAssay(Strict3) <- "SCT"
features <- c('MMRN1',"TLL1","IGFBP3","CADM2","NCKAP5","RGS6","PDE3B")

a <- VlnPlot(Strict3, features, stack = TRUE, sort = TRUE) +
        theme(legend.position = "none") + ggtitle("Identity on y-axis")

b <- VlnPlot(Strict3, features, stack = TRUE, sort = TRUE, flip = TRUE) +
        theme(legend.position = "none") + ggtitle("Identity on x-axis")
b

DotPlot(Strict3,features = genes, assay = "SCT",cols = c("blue", "red"))+RotatedAxis()

saveRDS(Strict3, "endothelial.rds")
## extract meta data
mp <- Strict3@meta.data %>% as.data.table
# the resulting md object has one "row" per cell
## count the number of cells per unique combinations of "Sample" and "seurat_clusters"
mp[, .N, by = c("orig.ident", "seurat_clusters")]
##     orig.ident seurat_clusters   N
##  1:        13S               4 199
##  2:        13S               1 343
##  3:        13S               2 194
##  4:        13S               0 351
##  5:        13S               6 121
##  6:        13S               3 350
##  7:        13S               5  93
##  8:        13S               7  24
##  9:        13V               0 307
## 10:        13V               1 186
## 11:        13V               4 113
## 12:        13V               2 114
## 13:        13V               6  86
## 14:        13V               5  39
## 15:        13V               3  61
## 16:        13V               7   7
## 17:         9S               2 323
## 18:         9S               1 313
## 19:         9S               0 474
## 20:         9S               3 235
## 21:         9S               4 145
## 22:         9S               5 142
## 23:         9S               6  34
## 24:         9S               7  26
## 25:         9V               0 734
## 26:         9V               4 136
## 27:         9V               3  84
## 28:         9V               1 204
## 29:         9V               5  76
## 30:         9V               2  97
## 31:         9V               6  40
## 32:         9V               7  34
## 33:        10S               2 368
## 34:        10S               0 502
## 35:        10S               3 412
## 36:        10S               1 336
## 37:        10S               4  76
## 38:        10S               5 218
## 39:        10S               6   8
## 40:        10S               7  10
## 41:        10V               0 179
## 42:        10V               3  52
## 43:        10V               5  17
## 44:        10V               1  58
## 45:        10V               6  12
## 46:        10V               4  53
## 47:        10V               2  41
## 48:        10V               7  11
## 49:        17S               0 390
## 50:        17S               2 396
## 51:        17S               3 265
## 52:        17S               4  88
## 53:        17S               5  81
## 54:        17S               1 349
## 55:        17S               6  13
## 56:        17S               7  15
## 57:        17V               0 358
## 58:        17V               1  78
## 59:        17V               3  51
## 60:        17V               7   1
## 61:        17V               4  38
## 62:        17V               2  33
## 63:        17V               5  30
## 64:        17V               6  12
##     orig.ident seurat_clusters   N
## with additional casting after the counting
mp[, .N, by = c("orig.ident", "sub.cluster")] %>% dcast(., orig.ident ~ sub.cluster, value.var = "N")
##    orig.ident 0_0 0_1   1   2   3   4   5   6  7
## 1:        10S  92 410 336 368 412  76 218   8 10
## 2:        10V 136  43  58  41  52  53  17  12 11
## 3:        13S  92 259 343 194 350 199  93 121 24
## 4:        13V 222  85 186 114  61 113  39  86  7
## 5:        17S 111 279 349 396 265  88  81  13 15
## 6:        17V 315  43  78  33  51  38  30  12  1
## 7:         9S  99 375 313 323 235 145 142  34 26
## 8:         9V 644  90 204  97  84 136  76  40 34
ggplot(Strict3@meta.data, aes(Strict3@meta.data$sub.cluster, fill = Strict3@meta.data$orig.ident))+geom_bar(stat="count")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))


summary(Strict.list[["Subcutaneous"]]@meta.data)

```r
saveRDS(Strict3, "endothelial.rds")
saveRDS(Strict3, "endothelial.rds")